*
* Purpose: Prepare applications for simulations
*

use eljaras karkod szaknev szakkod kepztelep szint munkarend fin_form felvett ponthatar jelszam using "${institutions_raw}", clear
merge 1:1 eljaras karkod szaknev szakkod kepztelep szint munkarend fin_form using "${temp_data_capacities}", keep(3) nogenerate

* Compute proportional capacities ----------------------------------------------
bys karkod szaknev szint munkarend field_study: egen felvett_program = max(felvett)
gen capacity_total = capacity_official if capacity_official > felvett_program
replace capacity_total = felvett_program if capacity_official <= felvett_program | no_capacity_in_booklet == 1 | no_official_capacity == 1
drop felvett_program
save "${temp_data_institutions}", replace

* ------------------------------------------------------------------------------
use id eljaras omkod nem oszttip3 targykod* targy*_jegy* tobblet_hh* ///
	tobblet_hhh* teltip8 tazon mkod2002 osszp* *hh* if eljaras == 1 using "${applicants_raw}", clear

rename omkod omid
label variable omid "High-school identifier"
rename nem gender
rename oszttip3 schooltype

forvalues g = 11/11 {
	gen hungarian`g' = .
	gen mathematics`g' = .
	gen history`g' = .

	forvalues i = 1/6 {
		replace hungarian`g' = targy`i'_jegy`g' if targykod`i' == 1
		replace mathematics`g' = targy`i'_jegy`g' if targykod`i' == 2
		replace history`g' = targy`i'_jegy`g' if targykod`i' == 3
	}
	
	replace hungarian`g' = . if hungarian`g' == 0
	replace mathematics`g' = . if mathematics`g' == 0
	replace history`g' = . if history`g' == 0
}

gen disadv1 = 0
gen disadv2 = 0

forvalues i = 1/16 {
	replace disadv1 = 1 if tobblet_hh`i' == 4
	replace disadv2 = 1 if tobblet_hhh`i' == 8
}
gen disadv = disadv1 + disadv2
label variable disadv "Disadvanted student"

label variable hungarian11 "Hungarian literature and grammar"
label variable mathematics11 "Mathematics"
label variable history11 "History"

gen GPA11_3 = (hungarian11 + mathematics11 + history11)/3
label variable GPA11_3 "GPA (0--5, Hungarian, math, history)"

replace teltip8 = 8 if teltip8 == . & tazon != .

gen budapest = teltip8 == 1
replace budapest = . if teltip8 == .
label variable budapest "Budapest"
gen county_town = teltip8 == 2
replace county_town = . if teltip8 == .
label variable county_town "County town"
gen town = teltip8 == 3
replace town = . if teltip8 == .
label variable town "Town"
gen village = teltip8 > 3 & teltip8 != .
replace village = . if teltip8 == .
label variable village "Village"

gen settlement_missing = teltip8 == .
label variable settlement_missing "Settlement missing"

keep id osszp* *hh* eljaras tazon teltip8 gender schooltype ///
	hungarian11 mathematics11 history11 GPA11_3 budapest county_town ///
	town village omid mkod2002 settlement_missing
	
merge n:1 omid using "${ses_2008_2012}", keep(1 3) nogenerate
gen year = 2007

replace tobblet_hh54 = "" if tobblet_hh54 == "n.i."
replace tobblet_hhh54 = "" if tobblet_hhh54 == "n.i."
destring tobblet_hh54, replace
destring tobblet_hhh54, replace
reshape long osszpont tobblet_hh tobblet_hhh, i(id) j(jelsorsz)

drop if osszpont == .

merge n:1 tazon year using "${settlement_level_income}", keep(1 3) nogenerate


merge 1:1 eljaras id jelsorsz using "${application_raw}", keep(3) nogenerate

drop Stem ev jelsorsz0

merge n:1 eljaras karkod szaknev szint munkarend fin_form field_study using "${temp_data_institutions}", keep(1 3) nogenerate keepusing(	)


gen rejected = jelsorsz < felv_sorsz1 | felv_sorsz1 == .
label variable rejected "Rejected by the contract (dummy)"
gen admitted = jelsorsz == felv_sorsz1
label variable admitted "Admitted to the contract (dummy)"

cap drop ev eljaras Stem

rename jelsorsz rank
label variable rank "Rank on ROL"
rename osszpont priorityscore
label variable priorityscore "Priority score"

bys id: egen disadv2 = min(tobblet_hhh) 
replace disadv2 = disadv2 == 8
label variable disadv2 "Disadvantaged student (severe)"

bys id: egen disadv1 = min(tobblet_hh) 
replace disadv1 = disadv1 == 4
replace disadv1 = 1 if disadv2 == 1
label variable disadv1 "Disadvantaged student (normal)"

drop tobblet*

rename osszjel rollength 
label variable rollength "Length of ROL (contracts, admin)"

rename fin_form funding
rename ponthatar priorityscore_cutoff
rename felv_sorsz1 rank_of_admission

sort karkod szaknev szint munkarend field_study funding
bys karkod szaknev szint munkarend field_study funding: gen ss1 = _n
replace ss1 = . if ss1 > 1
sort ss1 karkod szaknev szint munkarend field_study funding
bys ss1: gen contract_id_temp = _n
replace contract_id_temp = . if ss1 ! =1
bys karkod szaknev szint munkarend field_study funding: egen contract_id = min(contract_id_temp)
bys karkod szaknev szint munkarend field_study: egen program_id = min(contract_id)
label variable program_id "Program id"
label variable contract_id "Contract id"
drop ss1 contract_id_temp
rename szint program_level

bys contract_id: egen rejected_students = sum(rejected)
label variable rejected_students "Number of rejected students"

rename jelszam applicants
rename felvett admitted_applicants

label variable capacity "Capacity (tidy)"

label variable capacity_total "Total capacities per program"
label variable grossincome "Per capita annual gross income (in 2007 USD)"

do "${dir_code_prepare_data}/helpers/addCapacityClass.do"

label variable capacity_class "Field-level funded capacity code"
label variable grossincome "Gross income (1000USD, 2007 prices), settlement-level"
  
gen program_county = .
replace program_county = 3 if kepztelep == "Baja"
replace program_county = 1 if kepztelep == "Budapest"
replace program_county = 3 if kepztelep == "Bácsalmás"
replace program_county = 4 if kepztelep == "Békéscsaba"
replace program_county = 9 if kepztelep == "Debrecen"
replace program_county = 7 if kepztelep == "Dunaújváros"
replace program_county = 10 if kepztelep == "Eger"
replace program_county = 11 if kepztelep == "Esztergom"
replace program_county = 4 if kepztelep == "Gyula"
replace program_county = 10 if kepztelep == "Gyöngyös"
replace program_county = 8 if kepztelep == "Győr"
replace program_county = 13 if kepztelep == "Gödöllő"
replace program_county = 9 if kepztelep == "Hajdúböszörmény"
replace program_county = 6 if kepztelep == "Hódmezővásárhely"
replace program_county = 16 if kepztelep == "Jászberény"
replace program_county = 3 if kepztelep == "Kalocsa"
replace program_county = 14 if kepztelep == "Kaposvár"
replace program_county = 3 if kepztelep == "Kecskemét"
replace program_county = 20 if kepztelep == "Keszthely"
replace program_county = 16 if kepztelep == "Mezőtúr"
replace program_county = 5 if kepztelep == "Miskolc"
replace program_county = 8 if kepztelep == "Mosonmagyaróvár"
replace program_county = 20 if kepztelep == "Nagykanizsa"
replace program_county = 13 if kepztelep == "Nagykőrös"
replace program_county = 15 if kepztelep == "Nyíregyháza"
replace program_county = 4 if kepztelep == "Orosháza"
replace program_county = 13 if kepztelep == "Piliscsaba"
replace program_county = 19 if kepztelep == "Pápa"
replace program_county = 2 if kepztelep == "Pécs"
replace program_county = 9 if kepztelep == "Püspökladány"
replace program_county = 12 if kepztelep == "Salgótarján"
replace program_county = 14 if kepztelep == "Siófok"
replace program_county = 8 if kepztelep == "Sopron"
replace program_county = 4 if kepztelep == "Szarvas"
replace program_county = 6 if kepztelep == "Szeged"
replace program_county = 17 if kepztelep == "Szekszárd"
replace program_county = 16 if kepztelep == "Szolnok"
replace program_county = 18 if kepztelep == "Szombathely"
replace program_county = 7 if kepztelep == "Székesfehérvár"
replace program_county = 5 if kepztelep == "Sárospatak"
replace program_county = 11 if kepztelep == "Tatabánya"
replace program_county = 19 if kepztelep == "Veszprém"
replace program_county = 13 if kepztelep == "Vác"
replace program_county = 20 if kepztelep == "Zalaegerszeg"
replace program_county = 5 if kepztelep == "Ózd"
label values program_county megyenev

gen dual = 0
replace dual = 1 if field_study == 5 & (munkarend == 2 | munkarend == 3)
replace dual = 1 if field_study == 8 & (munkarend == 2 | munkarend == 3)
  
compress
save "${data_applications}", replace
export delimited using "${data_applications_csv}", replace

erase "${temp_data_institutions}"

